package com.answer.utils;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 * 过滤页面html字符
 *
 * @author Administrator
 */
public class HtmlUtil {
    public static String hsc(String str) {
        // str = str.replaceAll("&", "&amp;");
        // str = str.replaceAll("<", "&lt;");
        // str = str.replaceAll(">", "&gt;");
        // str = str.replaceAll("\"", "&quot;");
        return str;
    }

    public static String getTextFromTHML(String htmlStr) {
        Document doc = Jsoup.parse(htmlStr);
        String text = doc.text();
        // remove extra white space
        StringBuilder builder = new StringBuilder(text);
        int index = 0;
        while (builder.length() > index) {
            char tmp = builder.charAt(index);
            if (Character.isSpaceChar(tmp) || Character.isWhitespace(tmp)) {
                builder.setCharAt(index, ' ');
            }
            index++;
        }
        text = builder.toString().replaceAll(" +", " ").trim();
        return text;
    }

    public static void main(String[] args) {
        String a = "<p style=\"white-space: normal;\">今天我们来讲下如何用Spring来整合CXF，来发布WebService服务；<br/></p>";
        System.out.println(getTextFromTHML(a));
    }
}
